library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
df = read.csv('cps.csv')
head(df)
cat("Number of instances : ",nrow(df))
## Number of instances : 534
cat("\nNumber of attributes : ",ncol(df))
##
## Number of attributes : 11
str(df)
## 'data.frame': 534 obs. of 11 variables:
## $ wage : num 9 5.5 3.8 10.5 15 9 9.57 15 11 5 ...
## $ educ : int 10 12 12 12 12 16 12 14 8 12 ...
## $ race : chr "W" "W" "W" "W" ...
## $ sex : chr "M" "M" "F" "F" ...
## $ hispanic: chr "NH" "NH" "NH" "NH" ...
## $ south : chr "NS" "NS" "NS" "NS" ...
## $ married : chr "Married" "Married" "Single" "Married" ...
## $ exper : int 27 20 4 29 40 27 5 22 42 14 ...
## $ union : chr "Not" "Not" "Not" "Not" ...
## $ age : int 43 38 22 47 58 49 23 42 56 32 ...
## $ sector : chr "const" "sales" "sales" "clerical" ...
## Kernel Density Plot
density_plot = ggplot(df, aes(wage)) + geom_density(fill='indianred3') +
labs(x = 'wage', y = 'density', title = 'Kernal density of the brain weight')
density_plot
## Bar-plot
bar_plot = plot_ly(data=df, x=~race, y=~wage, type='bar')
bar_plot
x = df$wage ## numeric
y = df$age ## integer
cat(cor(x, y, method = c("pearson")))
## 0.1769669
## Bar-plot
bar_plot = plot_ly(data=df, x=~married, y=~wage, color=~sex, type='bar')
bar_plot %>% layout(
title = list(text = 'Age vs Wage'),
legend = list(title = 'Gender'),
xaxis = list(text = 'Age'),
yaxis = list(text = 'Wage')
)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
Differentiate the different species and also show the variation in the sepal length in the graph
df2 = iris
plot = ggplot(data = df2, aes(x=Sepal.Length, y=Sepal.Width, color=Sepal.Length, shape=Species)) + geom_point() +
labs(title = 'Sepal Length and Sepal Width',
x = 'Sepal Length',
y = 'Sepal Width')
plot
ggpairs(df2, columns=1:4, upper = 'blank', aes(color=Species)) + ggtitle('IRIS')
df4 = iris
df4 %>% mutate(ratio = Sepal.Length / Petal.Length)
head(df4)
df4 %>% select(-Species)
df4 %>% filter(Sepal.Width > 3.5)